/* Copyright 2009-2016 David Hadka
 *
 * This file is part of the MOEA Framework.
 *
 * The MOEA Framework is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or (at your
 * option) any later version.
 *
 * The MOEA Framework is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
 * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
 * License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with the MOEA Framework.  If not, see <http://www.gnu.org/licenses/>.
 */
package org.moeaframework.analysis.sensitivity;

import java.io.File;
import java.io.IOException;
import java.io.PrintStream;

import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.OptionBuilder;
import org.apache.commons.cli.Options;
import org.apache.commons.math3.stat.StatUtils;
import org.moeaframework.core.PRNG;
import org.moeaframework.util.CommandLineUtility;

/**
 * Global sensitivity analysis of blackbox model output using Saltelli's
 * improved Sobol' global variance decomposition procedure.
 * <p>
 * The following code was derived and translated from the C code used in the
 * study cited below. Refer to this article for a description of the procedure.
 * <p>
 * References:
 * <ol>
 * <li>Tang, Y., Reed, P., Wagener, T., and van Werkhoven, K., "Comparing
 * Sensitivity Analysis Methods to Advance Lumped Watershed Model Identification
 * and Evaluation," Hydrology and Earth System Sciences, vol. 11, no. 2, pp.
 * 793-817, 2007.
 * <li>Saltelli, A., et al. "Global Sensitivity Analysis: The Primer." John
 * Wiley & Sons Ltd, 2008.
 * </ol>
 */
public class SobolAnalysis extends CommandLineUtility {

	/**
	 * Number of resamples used to bootstrap the 50% confidence intervals.
	 */
	private int resamples = 1000;

	/**
	 * Parameters being analyzed.
	 */
	private ParameterFile parameterFile;

	/**
	 * Number of parameters.
	 */
	private int P;

	/**
	 * Number of samples.
	 */
	private int N;

	/**
	 * Index of the metric being evaluated.
	 */
	private int index;

	/**
	 * Output from the original parameters.
	 */
	private double[] A;

	/**
	 * Output from the resampled parameters.
	 */
	private double[] B;

	/**
	 * Output from the original samples where the j-th parameter is replaced by
	 * the corresponding resampled parameter.
	 */
	private double[][] C_A;

	/**
	 * Output from the resampled samples where the j-th parameter is replaced by
	 * the corresponding original parameter.
	 */
	private double[][] C_B;

	/**
	 * Constructs the command line utility for global sensitivity analysis
	 * using Sobol's global variance decomposition based on Saltelli's work.
	 */
	public SobolAnalysis() {
		super();
	}

	/**
	 * Loads the outputs from the file. Each line in the file must contain the
	 * output produced using the parameters generated by SobolSequence.
	 * 
	 * @param file the model output file
	 * @throws IOException if an I/O error occurred
	 */
	private void load(File file) throws IOException {
		MatrixReader reader = null;
		
		try {
			reader = new MatrixReader(file);

			A = new double[N];
			B = new double[N];
			C_A = new double[N][P];
			C_B = new double[N][P];

			for (int i = 0; i < N; i++) {
				A[i] = reader.next()[index];

				for (int j = 0; j < P; j++) {
					C_A[i][j] = reader.next()[index];
				}

				for (int j = 0; j < P; j++) {
					C_B[i][j] = reader.next()[index];
				}

				B[i] = reader.next()[index];
			}
		} finally {
			if (reader != null) {
				reader.close();
			}
		}
	}

	/**
	 * Computes and displays the first-, total-, and second- order Sobol'
	 * sensitivities and 50% bootstrap confidence intervals.
	 * 
	 * @param output the output stream
	 */
	private void display(PrintStream output) {
		output.println("Parameter	Sensitivity [Confidence]");

		output.println("First-Order Effects");
		for (int j = 0; j < P; j++) {
			double[] a0 = new double[N];
			double[] a1 = new double[N];
			double[] a2 = new double[N];

			for (int i = 0; i < N; i++) {
				a0[i] = A[i];
				a1[i] = C_A[i][j];
				a2[i] = B[i];
			}

			output.print("  ");
			output.print(parameterFile.get(j).getName());
			output.print(' ');
			output.print(computeFirstOrder(a0, a1, a2, N));
			output.print(" [");
			output.print(computeFirstOrderConfidence(a0, a1, a2, N, resamples));
			output.println(']');
		}

		output.println("Total-Order Effects");
		for (int j = 0; j < P; j++) {
			double[] a0 = new double[N];
			double[] a1 = new double[N];
			double[] a2 = new double[N];

			for (int i = 0; i < N; i++) {
				a0[i] = A[i];
				a1[i] = C_A[i][j];
				a2[i] = B[i];
			}

			output.print("  ");
			output.print(parameterFile.get(j).getName());
			output.print(' ');
			output.print(computeTotalOrder(a0, a1, a2, N));
			output.print(" [");
			output.print(computeTotalOrderConfidence(a0, a1, a2, N, resamples));
			output.println(']');
		}

		output.println("Second-Order Effects");
		for (int j = 0; j < P; j++) {
			for (int k = j + 1; k < P; k++) {
				double[] a0 = new double[N];
				double[] a1 = new double[N];
				double[] a2 = new double[N];
				double[] a3 = new double[N];
				double[] a4 = new double[N];

				for (int i = 0; i < N; i++) {
					a0[i] = A[i];
					a1[i] = C_B[i][j];
					a2[i] = C_A[i][k];
					a3[i] = C_A[i][j];
					a4[i] = B[i];
				}

				output.print("  ");
				output.print(parameterFile.get(j).getName());
				output.print(" * ");
				output.print(parameterFile.get(k).getName());
				output.print(' ');
				output.print(computeSecondOrder(a0, a1, a2, a3, a4, N));
				output.print(" [");
				output.print(computeSecondOrderConfidence(a0, a1, a2, a3, a4, N,
								resamples));
				output.println(']');
			}
		}
	}

	/**
	 * Computes and displays the first- and total-order Sobol' sensitivites and
	 * 50% bootstrap confidence intervals.
	 * 
	 * @param output the output stream
	 */
	private void displaySimple(PrintStream output) {
		output.println("First-Order Effects");
		for (int j = 0; j < P; j++) {
			double[] a0 = new double[N];
			double[] a1 = new double[N];
			double[] a2 = new double[N];

			for (int i = 0; i < N; i++) {
				a0[i] = A[i];
				a1[i] = C_A[i][j];
				a2[i] = B[i];
			}

			double value = computeFirstOrder(a0, a1, a2, N);
			output.print(value < 0 ? 0.0 : value);

			if (j < P - 1) {
				output.print('\t');
			}
		}

		output.println();
		output.println("Total-Order Effects");
		for (int j = 0; j < P; j++) {
			double[] a0 = new double[N];
			double[] a1 = new double[N];
			double[] a2 = new double[N];

			for (int i = 0; i < N; i++) {
				a0[i] = A[i];
				a1[i] = C_A[i][j];
				a2[i] = B[i];
			}

			double value = computeTotalOrder(a0, a1, a2, N);
			output.print(value < 0 ? 0.0 : value);

			if (j < P - 1) {
				output.print('\t');
			}
		}

		output.println();
	}

	/**
	 * Returns the first-order confidence interval of the i-th parameter.  The
	 * arguments to this method mirror the arguments to
	 * {@link #computeFirstOrder}.
	 * 
	 * @param a0 the output from the first independent samples
	 * @param a1 the output from the samples produced by swapping the i-th
	 *        parameter in the first independent samples with the i-th parameter
	 *        from the second independent samples
	 * @param a2 the output from the second independent samples
	 * @param nsample the number of samples
	 * @param nresample the number of resamples used when calculating the
	 *        confidence interval
	 * @return the first-order confidence interval of the i-th parameter
	 */
	private static double computeFirstOrderConfidence(double[] a0, double[] a1,
			double[] a2, int nsample, int nresample) {
		double[] b0 = new double[nsample];
		double[] b1 = new double[nsample];
		double[] b2 = new double[nsample];
		double[] s = new double[nresample];

		for (int i = 0; i < nresample; i++) {
			for (int j = 0; j < nsample; j++) {
				int index = PRNG.nextInt(nsample);

				b0[j] = a0[index];
				b1[j] = a1[index];
				b2[j] = a2[index];
			}

			s[i] = computeFirstOrder(b0, b1, b2, nsample);
		}

		double ss = StatUtils.sum(s) / nresample;
		double sss = 0.0;
		
		for (int i = 0; i < nresample; i++) {
			sss += Math.pow(s[i] - ss, 2.0);
		}

		return 1.96 * Math.sqrt(sss / (nresample - 1));
	}

	/**
	 * Returns the first-order sensitivity of the i-th parameter.  Note how
	 * the contents of the array {@code a1} specify the parameter being
	 * analyzed.
	 * 
	 * @param a0 the output from the first independent samples
	 * @param a1 the output from the samples produced by swapping the i-th
	 *        parameter in the first independent samples with the i-th parameter
	 *        from the second independent samples
	 * @param a2 the output from the second independent samples
	 * @param nsample the number of samples
	 * @return the first-order sensitivity of the i-th parameter
	 */
	private static double computeFirstOrder(double[] a0, double[] a1,
			double[] a2, int nsample) {
		double c = 0.0;
		for (int i = 0; i < nsample; i++) {
			c += a0[i];
		}
		c /= nsample;

		double tmp1 = 0.0;
		double tmp2 = 0.0;
		double tmp3 = 0.0;
		double EY2 = 0.0;

		for (int i = 0; i < nsample; i++) {
			EY2 += (a0[i] - c) * (a2[i] - c);
			tmp1 += (a2[i] - c) * (a2[i] - c);
			tmp2 += (a2[i] - c);
			tmp3 += (a1[i] - c) * (a2[i] - c);
		}

		EY2 /= nsample;

		double V = (tmp1 / (nsample - 1)) - Math.pow(tmp2 / nsample, 2.0);
		double U = tmp3 / (nsample - 1);

		return (U - EY2) / V;
	}

	/**
	 * Returns the total-order sensitivity of the i-th parameter.  Note how
	 * the contents of the array {@code a1} specify the parameter being
	 * analyzed.
	 * 
	 * @param a0 the output from the first independent samples
	 * @param a1 the output from the samples produced by swapping the i-th
	 *        parameter in the first independent samples with the i-th parameter
	 *        from the second independent samples
	 * @param a2 the output from the second independent samples
	 * @param nsample the number of samples
	 * @return the total-order sensitivity of the i-th parameter
	 */
	private static double computeTotalOrder(double[] a0, double[] a1,
			double[] a2, int nsample) {
		double c = 0.0;
		
		for (int i = 0; i < nsample; i++) {
			c += a0[i];
		}
		
		c /= nsample;

		double tmp1 = 0.0;
		double tmp2 = 0.0;
		double tmp3 = 0.0;

		for (int i = 0; i < nsample; i++) {
			tmp1 += (a0[i] - c) * (a0[i] - c);
			tmp2 += (a0[i] - c) * (a1[i] - c);
			tmp3 += (a0[i] - c);
		}

		double EY2 = Math.pow(tmp3 / nsample, 2.0);
		double V = (tmp1 / (nsample - 1)) - EY2;
		double U = tmp2 / (nsample - 1);

		return 1.0 - ((U - EY2) / V);
	}

	/**
	 * Returns the total-order confidence interval of the i-th parameter.  The
	 * arguments to this method mirror the arguments to
	 * {@link #computeTotalOrder}.
	 * 
	 * @param a0 the output from the first independent samples
	 * @param a1 the output from the samples produced by swapping the i-th
	 *        parameter in the first independent samples with the i-th parameter
	 *        from the second independent samples
	 * @param a2 the output from the second independent samples
	 * @param nsample the number of samples
	 * @param nresample the number of resamples used when calculating the
	 *        confidence interval
	 * @return the total-order confidence interval of the i-th parameter
	 */
	private static double computeTotalOrderConfidence(double[] a0, double[] a1,
			double[] a2, int nsample, int nresample) {
		double[] b0 = new double[nsample];
		double[] b1 = new double[nsample];
		double[] b2 = new double[nsample];
		double[] s = new double[nresample];

		for (int i = 0; i < nresample; i++) {
			for (int j = 0; j < nsample; j++) {
				int index = PRNG.nextInt(nsample);

				b0[j] = a0[index];
				b1[j] = a1[index];
				b2[j] = a2[index];
			}

			s[i] = computeTotalOrder(b0, b1, b2, nsample);
		}

		double ss = StatUtils.sum(s) / nresample;
		double sss = 0.0;
		
		for (int i = 0; i < nresample; i++) {
			sss += Math.pow(s[i] - ss, 2.0);
		}

		return 1.96 * Math.sqrt(sss / (nresample - 1));
	}

	/**
	 * Returns the second-order sensitivity of the i-th and j-th parameters.  
	 * Note how the contents of the arrays {@code a1}, {@code a2}, and
	 * {@code a3} specify the two parameters being analyzed.
	 * 
	 * @param a0 the output from the first independent samples
	 * @param a1 the output from the samples produced by swapping the i-th
	 *        parameter in the second independent samples with the i-th
	 *        parameter from the first independent samples
	 * @param a2 the output from the samples produced by swapping the j-th
	 *        parameter in the first independent samples with the j-th parameter
	 *        from the second independent samples
	 * @param a3 the output from the samples produced by swapping the i-th
	 *        parameter in the first independent samples with the i-th parameter
	 *        from the second independent samples
	 * @param a4 the output from the second independent samples
	 * @param nsample the number of samples
	 * @param nresample the number of resamples used when calculating the
	 *        confidence interval
	 * @return the second-order sensitivity of the i-th and j-th parameters
	 */
	private static double computeSecondOrder(double[] a0, double[] a1,
			double[] a2, double[] a3, double[] a4, int nsample) {
		double c = 0.0;
		
		for (int i = 0; i < nsample; i++) {
			c += a0[i];
		}
		
		c /= nsample;

		double EY = 0.0;
		double EY2 = 0.0;
		double tmp1 = 0.0;
		double tmp2 = 0.0;
		double tmp3 = 0.0;
		double tmp4 = 0.0;
		double tmp5 = 0.0;

		for (int i = 0; i < nsample; i++) {
			EY += (a0[i] - c) * (a4[i] - c);
			EY2 += (a1[i] - c) * (a3[i] - c);
			tmp1 += (a1[i] - c) * (a1[i] - c);
			tmp2 += (a1[i] - c);
			tmp3 += (a1[i] - c) * (a2[i] - c);
			tmp4 += (a2[i] - c) * (a4[i] - c);
			tmp5 += (a3[i] - c) * (a4[i] - c);
		}

		EY /= nsample;
		EY2 /= nsample;

		double V = (tmp1 / (nsample - 1)) - Math.pow(tmp2 / nsample, 2.0);
		double Vij = (tmp3 / (nsample - 1)) - EY2;
		double Vi = (tmp4 / (nsample - 1)) - EY;
		double Vj = (tmp5 / (nsample - 1)) - EY2;

		return (Vij - Vi - Vj) / V;
	}

	/**
	 * Returns the second-order confidence interval of the i-th and j-th
	 * parameters.  The arguments to this method mirror the arguments to
	 * {@link #computeSecondOrder}.
	 * 
	 * @param a0 the output from the first independent samples
	 * @param a1 the output from the samples produced by swapping the i-th
	 *        parameter in the second independent samples with the i-th
	 *        parameter from the first independent samples
	 * @param a2 the output from the samples produced by swapping the j-th
	 *        parameter in the first independent samples with the j-th parameter
	 *        from the second independent samples
	 * @param a3 the output from the samples produced by swapping the i-th
	 *        parameter in the first independent samples with the i-th parameter
	 *        from the second independent samples
	 * @param a4 the output from the second independent samples
	 * @param nsample the number of samples
	 * @return the second-order confidence interval of the i-th and j-th
	 *         parameters
	 */
	private static double computeSecondOrderConfidence(double[] a0,
			double[] a1, double[] a2, double[] a3, double[] a4, int nsample,
			int nresample) {
		double[] b0 = new double[nsample];
		double[] b1 = new double[nsample];
		double[] b2 = new double[nsample];
		double[] b3 = new double[nsample];
		double[] b4 = new double[nsample];
		double[] s = new double[nresample];

		for (int i = 0; i < nresample; i++) {
			for (int j = 0; j < nsample; j++) {
				int index = PRNG.nextInt(nsample);

				b0[j] = a0[index];
				b1[j] = a1[index];
				b2[j] = a2[index];
				b3[j] = a3[index];
				b4[j] = a4[index];
			}

			s[i] = computeSecondOrder(b0, b1, b2, b3, b4, nsample);
		}

		double ss = StatUtils.sum(s) / nresample;
		double sss = 0.0;
		
		for (int i = 0; i < nresample; i++) {
			sss += Math.pow(s[i] - ss, 2.0);
		}

		return 1.96 * Math.sqrt(sss / (nresample - 1));
	}

	/**
	 * Ensures the model output file contains N*(2P+2) lines and returns N, the
	 * number of samples.
	 * 
	 * @param file the model output file being validated
	 * @return the number of samples
	 * @throws IOException
	 */
	private int validate(File file) throws IOException {
		MatrixReader reader = null;
		
		try {
			reader = new MatrixReader(file);
			int count = 0;

			while (reader.hasNext()) {
				if (reader.next().length > index) {
					count++;
				} else {
					break;
				}
			}

			if (count % (2 * P + 2) != 0) {
				System.err.println(file + " is incomplete");
			}

			return count / (2 * P + 2);
		} finally {
			if (reader != null) {
				reader.close();
			}
		}
	}

	@SuppressWarnings("static-access")
	@Override
	public Options getOptions() {
		Options options = super.getOptions();

		options.addOption(OptionBuilder
				.withLongOpt("parameterFile")
				.hasArg()
				.withArgName("file")
				.isRequired()
				.create('p'));
		options.addOption(OptionBuilder
				.withLongOpt("input")
				.hasArg()
				.withArgName("file")
				.isRequired()
				.create('i'));
		options.addOption(OptionBuilder
				.withLongOpt("metric")
				.hasArg()
				.withArgName("value")
				.isRequired()
				.create('m'));
		options.addOption(OptionBuilder
				.withLongOpt("simple")
				.create('s'));
		options.addOption(OptionBuilder
				.withLongOpt("output")
				.hasArg()
				.withArgName("file")
				.create('o'));
		options.addOption(OptionBuilder
				.withLongOpt("resamples")
				.hasArg()
				.withArgName("number")
				.create('r'));

		return options;
	}

	@Override
	public void run(CommandLine commandLine) throws Exception {
		PrintStream output = null;
		
		//setup the parameters
		parameterFile = new ParameterFile(new File(
				commandLine.getOptionValue("parameterFile")));
		index = Integer.parseInt(commandLine.getOptionValue("metric"));
		P = parameterFile.size();
		
		if (commandLine.hasOption("resamples")) {
			resamples = Integer.parseInt(commandLine.getOptionValue(
					"resamples"));
		}

		//load and validate the model output file
		File input = new File(commandLine.getOptionValue("input"));
		N = validate(input);
		load(input);

		try {
			//setup the output stream
			if (commandLine.hasOption("output")) {
				output = new PrintStream(new File(
						commandLine.getOptionValue("output")));
			} else {
				output = System.out;
			}
			
			//perform the Sobol analysis and display the results
			if (commandLine.hasOption("simple")) {
				displaySimple(output);
			} else {
				display(output);
			}
		} finally {
			if ((output != null) && (output != System.out)) {
				output.close();
			}
		}
	}

	/**
	 * Command line utility for global sensitivity analysis using Sobol's global
	 * variance decomposition based on Saltelli's work.
	 * 
	 * @param args the command line arguments
	 * @throws Exception if an error occurred
	 */
	public static void main(String[] args) throws Exception {
		new SobolAnalysis().start(args);
	}

}
